#!/usr/bin/env perl
use strict;
use warnings;
use feature 'say';
use autodie;

use FindBin '$RealBin';
use lib "$RealBin/lib";

use Vnlog::Util qw(parse_options read_and_preparse_input ensure_all_legends_equivalent reconstruct_substituted_command);



# This comes from the struct option longopts in uniq.c in GNU coreutils
my @specs = ( # options with no args
             "c|count",
             "d|repeated",
             "i|ignore-case",
             "u|unique",
             "zero-terminated|z",

             # options with args
             "all-repeated|D:s",
             "f|skip-fields=i",
             "s|skip-chars=i",
             "check-chars|w=i",
             "group:s",

             "vnl-count=s",

             "vnl-tool=s",
             "help");


my %options_unsupported = ( 'zero-terminated' => <<'EOF'
vnlog is built on assuming a particular record separator
EOF
                          );

my ($filenames,$options) = parse_options(\@ARGV, \@specs, 0, <<EOF);
  $0 [options] < logfile

A very common usage takes no options: suppresses duplicate consecutive records.

The most common options are (from the GNU uniq manpage):

  -c, --count
         prefix lines by the number of occurrences

  -d, --repeated
         only print duplicate lines, one for each group

  -D     print all duplicate lines

  --all-repeated[=METHOD]
         like  -D,  but  allow  separating  groups with an empty line;
         METHOD={none(default),prepend,separate}

  -f, --skip-fields=N
         avoid comparing the first N fields (vnl-uniq extension: N<0 avoids
         comparing all but the LAST -N fields. To use ONLY the one last field,
         pass -f -1 or --skip-fields=-1)

  --group[=METHOD]
         show  all  items,  separating  groups  with  an  empty  line;
         METHOD={separate(default),prepend,append,both}

  -i, --ignore-case
         ignore differences in case when comparing

  -u, --unique
         only print unique lines

There's also vnlog-specific options

  --vnl-count NAME
         Implies -c. Like -c, adds a column of occurrence counts, but gives this
         column an arbitrary name given on the commandline

  --vnl-tool tool
       Specifies the path to the tool we're wrapping. By default we wrap 'uniq',
       so most people can omit this
EOF
$options->{'vnl-tool'} //= 'uniq';

if( $options->{'vnl-count'} )
{
    $options->{c} = 1;
}
if( defined $options->{'all-repeated'}      &&
    length($options->{'all-repeated'}) > 0  &&
    $options->{'all-repeated'} ne 'none'    &&
    $options->{'all-repeated'} ne 'prepend' &&
    $options->{'all-repeated'} ne 'separate'  )
{
    die "--all-repeated=XXX was given. XXX may be ONLY one of 'none','prepend','separate'. Got '" . $options->{'all-repeated'} . "'";
}
if( defined $options->{'group'} )
{
    if( length($options->{'group'}) > 0   &&
        $options->{'group'} ne 'separate' &&
        $options->{'group'} ne 'prepend'  &&
        $options->{'group'} ne 'append'   &&
        $options->{'group'} ne 'both' )
    {
        die "--group=XXX was given. XXX may be ONLY one of 'separate','prepend','append','both'. Got '" . $options->{'group'} . "'";
    }

    if( $options->{c}                      ||
        $options->{d}                      ||
        defined $options->{'all-repeated'} ||
        $options->{u} )
    {
        # The underlying uniq tool dictates this
        die "--group is mutually exclusive with -c/-d/-D/-u";
    }
}

if(@$filenames != 1)
{
    die "At most one file may be given on the commandline: the input";
}

for my $key(keys %$options)
{
    if($options_unsupported{$key})
    {
        my $keyname = length($key) == 1 ? "-$key" : "--$key";
        die("I don't support $keyname: $options_unsupported{$key}");
    }
}

my $inputs = read_and_preparse_input($filenames);

if( defined $options->{'f'} &&
    $options->{'f'} < 0 )
{
    # I want to use only the last -N fields, so I skip the first Nfields-N
    # fields
    my $Nfields = @{$inputs->[0]{keys}};
    $options->{'f'} += $Nfields;
}

my $ARGV_new = reconstruct_substituted_command($inputs, $options, [], \@specs);

print '# ';
if($options->{c})
{
    print (($options->{'vnl-count'} || 'count') . ' ');
}
say join(' ', @{$inputs->[0]{keys}});
exec $options->{'vnl-tool'}, @$ARGV_new;



__END__

=head1 NAME

vnl-uniq - uniq a log file, preserving the legend

=head1 SYNOPSIS

 $ cat sizes-colors.vnl
 # size  color
 12     blue
 11     yellow
 13     yellow
 53     blue
 34     yellow
 22     orange
 11     orange

 $ < sizes-colors.vnl | vnl-filter -p color | vnl-sort -k color | vnl-uniq -c
 # count color
       2 blue
       2 orange
       3 yellow

 $ < sizes-colors.vnl | vnl-sort -k color | vnl-uniq --group -f -1
 # size color
 12     blue
 53     blue

 11     orange
 22     orange

 11     yellow
 13     yellow
 34     yellow

=head1 DESCRIPTION

  Usage: vnl-uniq [options] < logfile

This tool runs C<uniq> on a given vnlog dataset. C<vnl-uniq> is a wrapper around
the GNU coreutils C<uniq> tool. Since this is a wrapper, most commandline
options and behaviors of the C<uniq> tool are present; consult the L<uniq(1)>
manpage for detail. The differences from GNU coreutils C<uniq> are

=over

=item *

The input and output to this tool are vnlog files, complete with a legend

=item *

C<--zero-terminated> is not supported because vnlog assumes
newline-separated records

=item *

Only I<one> input is supported (a file on the cmdline or data on standard
input), and the output I<always> goes to standard output. Specifying the output
as a file on the commandline is not supported.

=item *

C<--vnl-count NAME> can be given to name the C<count> column. C<-c> is still
supported to add the default new column named C<count>, but if another name is
wanted, C<--vnl-count> does that. C<--vnl-count> implies C<-c>

=item *

In addition to the normal behavior of skipping fields at the start, C<-f> and
C<--skip-fields> can take a negative argument to skip the I<all but the last> N
fields. For instance, to use only the one last field, pass C<-f -1> or
C<--skip-fields=-1>.

=item *

By default we call the C<uniq> tool to do the actual work. If the underlying
tool has a different name or lives in an odd path, this can be specified by
passing C<--vnl-tool TOOL>

=back

Past that, everything C<uniq> does is supported, so see that man page for
detailed documentation.

=head1 COMPATIBILITY

I use GNU/Linux-based systems exclusively, but everything has been tested
functional on FreeBSD and OSX in addition to Debian, Ubuntu and CentOS. I can
imagine there's something I missed when testing on non-Linux systems, so please
let me know if you find any issues.

=head1 SEE ALSO

L<uniq(1)>

=head1 REPOSITORY

https://github.com/dkogan/vnlog/

=head1 AUTHOR

Dima Kogan C<< <dima@secretsauce.net> >>

=head1 LICENSE AND COPYRIGHT

Copyright 2019 Dima Kogan C<< <dima@secretsauce.net> >>

This library is free software; you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License as published by the Free
Software Foundation; either version 2.1 of the License, or (at your option) any
later version.

=cut
